Setup

Load MOFA model RNA and proteome data

# Load the MOFA model
model_path <- "../Results/MOFA_models/mofa_cptac_pdac_rna_proteome_model.hdf5"
MOFAmodel <- load_model(model_path)
summary(MOFAmodel)
## Length  Class   Mode 
##      1   MOFA     S4

Load MOFA model RNA, proteome, and mutation data

# Load the MOFA model
model_path_v2 <- "../Results/MOFA_models/mofa_cptac_pdac_rna_proteome_mutations_model.hdf5"
MOFAmodel_2 <- load_model(model_path_v2)
summary(MOFAmodel_2)
## Length  Class   Mode 
##      1   MOFA     S4

Variance explained by factors and views

# Get variance explained
var_explained <- get_variance_explained(MOFAmodel, as.data.frame = TRUE)
print(var_explained)
## $r2_per_factor
##     group    view  factor       value
## 1  group1     RNA Factor1 14.87849951
## 2  group1     RNA Factor2  8.23851824
## 3  group1     RNA Factor3  1.04650855
## 4  group1     RNA Factor4  9.14654732
## 5  group1     RNA Factor5 11.66033149
## 6  group1     RNA Factor6  3.42051387
## 7  group1 Protein Factor1  0.00140667
## 8  group1 Protein Factor2  6.49857521
## 9  group1 Protein Factor3 12.91454434
## 10 group1 Protein Factor4  3.38932872
## 11 group1 Protein Factor5  0.62047243
## 12 group1 Protein Factor6  5.44626117
## 
## $r2_total
##    group    view    value
## 1 group1     RNA 47.17320
## 2 group1 Protein 28.57813
var_explained2 <- get_variance_explained(MOFAmodel_2, as.data.frame = TRUE)
print(var_explained2)
## $r2_per_factor
##     group     view  factor        value
## 1  group1      RNA Factor1 14.872688055
## 2  group1      RNA Factor2  8.255583048
## 3  group1      RNA Factor3  1.048505306
## 4  group1      RNA Factor4  9.111559391
## 5  group1      RNA Factor5 11.729437113
## 6  group1      RNA Factor6  3.402471542
## 7  group1 Mutation Factor1  0.002831221
## 8  group1 Mutation Factor2  0.004106760
## 9  group1 Mutation Factor3  0.005424023
## 10 group1 Mutation Factor4  0.003153086
## 11 group1 Mutation Factor5  0.003492832
## 12 group1 Mutation Factor6  0.002533197
## 13 group1  Protein Factor1  0.001335144
## 14 group1  Protein Factor2  6.530237198
## 15 group1  Protein Factor3 12.920129299
## 16 group1  Protein Factor4  3.396552801
## 17 group1  Protein Factor5  0.600993633
## 18 group1  Protein Factor6  5.399763584
## 
## $r2_total
##    group     view       value
## 1 group1      RNA 47.17723131
## 2 group1 Mutation  0.02154708
## 3 group1  Protein 28.56839895
# Plot variance explained by each factor for each view
plot_variance_explained(MOFAmodel)

# Plot correlations between factors
plot_factor_cor(MOFAmodel, factors = 1:6)
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "factors" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "factors" is not a graphical parameter
## Warning in title(title, ...): "factors" is not a graphical parameter

# print factor correlations as a matrix
factors_mat <- get_factors(MOFAmodel)[[1]]
factor_cor_matrix <- cor(factors_mat)
print(round(factor_cor_matrix, 2))
##         Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## Factor1    1.00   -0.20   -0.23   -0.07   -0.08   -0.21
## Factor2   -0.20    1.00    0.01    0.00    0.06    0.05
## Factor3   -0.23    0.01    1.00    0.07    0.24    0.12
## Factor4   -0.07    0.00    0.07    1.00   -0.02    0.06
## Factor5   -0.08    0.06    0.24   -0.02    1.00    0.01
## Factor6   -0.21    0.05    0.12    0.06    0.01    1.00

Load clinical metadata

clinical_df <- read.csv("../Data/Metadata/metadata_merged_clinical_mol-phenotype_from_paper.csv", 
                        row.names = 1, 
                        stringsAsFactors = FALSE,
                        na.strings = c("", " ", "NA", "N/A", "na", "null", "NULL", "NaN"))
cat("Before replacement:\n")
## Before replacement:
print(head(rownames(clinical_df)))
## [1] "C3L-00102" "C3L-00189" "C3L-00277" "C3L-00401" "C3L-00640" "C3L-00819"
# replace '-' with '.' in case IDs to match sample IDs in MOFA data
rownames(clinical_df) <- gsub("-", ".", rownames(clinical_df))

cat("\nAfter replacement:\n")
## 
## After replacement:
print(head(rownames(clinical_df)))
## [1] "C3L.00102" "C3L.00189" "C3L.00277" "C3L.00401" "C3L.00640" "C3L.00819"
print(dim(clinical_df))
## [1] 140  94

Append clinical metadata to MOFA sample_metadata

# Get sample names from MOFA model
mofa_samples <- unlist(samples_names(MOFAmodel))
cat("Number of MOFA samples:", length(mofa_samples), "\n")
## Number of MOFA samples: 140
cat("First MOFA samples:\n")
## First MOFA samples:
print(head(mofa_samples))
##     group11     group12     group13     group14     group15     group16 
## "C3N.03884" "C3L.04027" "C3N.04283" "C3L.04475" "C3N.00517" "C3L.01037"
cat("\nNumber of clinical samples:", nrow(clinical_df), "\n")
## 
## Number of clinical samples: 140
cat("First clinical samples:\n")
## First clinical samples:
print(head(rownames(clinical_df)))
## [1] "C3L.00102" "C3L.00189" "C3L.00277" "C3L.00401" "C3L.00640" "C3L.00819"
cat("\nNumber of matching samples:", sum(mofa_samples %in% rownames(clinical_df)), "\n")
## 
## Number of matching samples: 140
# Match and create metadata
idx <- match(mofa_samples, rownames(clinical_df))
sample_meta <- data.frame(sample = mofa_samples, stringsAsFactors = FALSE)
sample_meta <- cbind(sample_meta, clinical_df[idx, ])

samples_metadata(MOFAmodel) <- sample_meta
cat("\nAdded metadata to MOFAmodel\n")
## 
## Added metadata to MOFAmodel
print(dim(samples_metadata(MOFAmodel)))
## [1] 140  96
print(head(samples_metadata(MOFAmodel)[, 1:5]))
##            sample tumor_included_for_the_study normal_included_for_the_study
## group11 C3N.03884                          yes                           yes
## group12 C3L.04027                          yes                            no
## group13 C3N.04283                          yes                            no
## group14 C3L.04475                          yes                            no
## group15 C3N.00517                          yes                           yes
## group16 C3L.01037                          yes                           yes
##         histology_diagnosis age
## group11                PDAC  64
## group12                PDAC  69
## group13                PDAC  66
## group14                PDAC  77
## group15                PDAC  69
## group16                PDAC  42
# Print specific sample
cat("\n\nData for sample C3L.00401:\n")
## 
## 
## Data for sample C3L.00401:
specific_sample <- samples_metadata(MOFAmodel)[samples_metadata(MOFAmodel)$sample == "C3L.00401", ]
print(specific_sample)
##              sample tumor_included_for_the_study normal_included_for_the_study
## group1120 C3L.00401                          yes                           yes
##           histology_diagnosis age    sex race participant_country tumor_site
## group1120                PDAC  62 Female <NA>              Canada       body
##           tumor_focality tumor_size_cm tumor_necrosis lymph_vascular_invasion
## group1120       Unifocal           2.8 Not identified                 Present
##           perineural_invasion number_of_lymph_nodes_examined
## group1120             Present                             40
##           number_of_lymph_nodes_positive_for_tumor
## group1120                                        3
##           pathologic_staging_regional_lymph_nodes_pn
## group1120                                        pN1
##           pathologic_staging_primary_tumor_pt
## group1120                                 pT2
##           pathologic_staging_distant_metastasis_pm
## group1120                                      pM0
##           clinical_staging_distant_metastasis_cm         residual_tumor
## group1120                                    cM0 R0:  No residual tumor
##           tumor_stage_pathological additional_pathologic_findings   bmi
## group1120                Stage IIB           Chronic pancreatitis 22.42
##                                                                                                alcohol_consumption
## group1120 Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women
##                                                    tobacco_smoking_history
## group1120 Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime
##                                                      medical_condition
## group1120 Hysterectomy|kidney stones|varicose veins|jaundice|allergies
##           Neoplastic_cellularity Acinar_fraction Islet_fraction
## group1120               45;35;45          5;10;5          2;2;3
##           Stromal_fraction Non_neoplastic_duct Fat_fraction
## group1120         35;30;32             3;10;10        0;0;0
##           Inflammation_fraction Muscle_fraction follow_up_days vital_status
## group1120               10;13;5           0;0;0           1228       Living
##           is_this_patient_lost_to_follow_up cause_of_death immune_deconv
## group1120                                No           <NA>     0.5183173
##           epithelial_cancer_deconv stromal_deconv
## group1120               0.09130988      0.2684147
##           mature_exocrine_endocrine_deconv
## group1120                        0.1219581
##           neoplastic_cellularity_histology_estimate acinar_histology_estimate
## group1120                                  0.400641                0.06410256
##           islet_histology_estimate stromal_histology_estimate
## group1120                0.0224359                  0.3108974
##           Non_neoplastic_duct_histology_estimate Fat_histology_estimate
## group1120                             0.07371795                      0
##           inflammation_histology_estimate Muscle_histology_estimate
## group1120                      0.08974359                         0
##           necrosis_..OF_TUMOR_WITH_NECROSIS._histology_estimate Bailey
## group1120                                            0.03846154   ADEX
##               Collisson   Moffitt StromalScore_ESTIMATE ImmuneScore_ESTIMATE
## group1120 exocrine-like CLASSICAL              8883.123             8339.309
##           mutation_count   KRAS_VAF chr9p_del  chr18q_del  chr17p_del   chrIdx
## group1120              8 0.04514673   -0.0345 -0.06218693 -0.05816441 1.052433
##           MSI_status Androgen_pathway EGFR_pathway Estrogen_pathway
## group1120        MSS       -0.3585243   0.03647063        0.4321429
##           Hypoxia_pathway JAK.STAT_pathway MAPK_pathway NFkB_pathway
## group1120      -0.5891487        0.5558351    0.1610661     1.136408
##           p53_pathway PI3K_pathway TGFb_pathway TNFa_pathway Trail_pathway
## group1120  -0.4217425    0.7678964     0.703444     1.043745      0.807452
##           VEGF_pathway WNT_pathway T.cells_MCPCounter CD8.T.cells_MCPCounter
## group1120   -0.1902289   0.7106079           7.299398               7.026461
##           Cytotoxic.lymphocytes_MCPCounter B.lineage_MCPCounter
## group1120                         6.386566                7.228
##           NK.cells_MCPCounter Monocytic.lineage_MCPCounter
## group1120            3.069608                      10.9729
##           Myeloid.dendritic.cells_MCPCounter Neutrophils_MCPCounter
## group1120                           6.956691               8.835254
##           Endothelial.cells_MCPCounter Fibroblasts_MCPCounter PFS_days
## group1120                     8.853753                14.6807      532
##           PFS_event OS_days OS_event PurIST_Subtype PurIST_Subtype_graded
## group1120         1    1262        1      classical      strong classical
##           ROIVolume  group
## group1120  7.290634 group1

Correlations between factors and clinical variables

# Correlate factors with clinical variables
# covariates can be specified as a vector of column names in the sample metadata
covariates <- c(
  "PurIST_Subtype", 
  "PurIST_Subtype_graded",
  "Moffitt", 
  "Bailey",
  "Collisson",
  "tumor_size_cm",
  "ROIVolume",
  "tumor_stage_pathological", 
  "pathologic_staging_primary_tumor_pt", 
  "Neoplastic_cellularity",
  "vital_status",
  "cause_of_death",
  "mutation_count",
  "KRAS_VAF",
  "PFS_days",
  "PFS_event",
  "OS_days",
  "OS_event"
)

cor_results <- correlate_factors_with_covariates(
  MOFAmodel, 
  covariates = covariates,
  plot = 'r'
)
## Warning in correlate_factors_with_covariates(MOFAmodel, covariates =
## covariates, : There are non-numeric values in the covariates data.frame,
## converting to numeric...

# Get the -log10(pval) matrix
log_pval_matrix <- correlate_factors_with_covariates(
  MOFAmodel, 
  covariates = covariates,
  plot = 'log_pval',
  return_data = TRUE
)
## Warning in correlate_factors_with_covariates(MOFAmodel, covariates =
## covariates, : There are non-numeric values in the covariates data.frame,
## converting to numeric...
# Get the correlation coefficient matrix
cor_matrix <- correlate_factors_with_covariates(
  MOFAmodel, 
  covariates = covariates,
  plot = 'r',
  return_data = TRUE
)
## Warning in correlate_factors_with_covariates(MOFAmodel, covariates =
## covariates, : There are non-numeric values in the covariates data.frame,
## converting to numeric...
cat("\n-log10(p-value) matrix:\n")
## 
## -log10(p-value) matrix:
print(log_pval_matrix)
##         PurIST_Subtype PurIST_Subtype_graded  Moffitt    Bailey Collisson
## Factor1       1.530404              2.981798 0.000000  2.021594  0.000000
## Factor2      10.161171              4.816827 5.613916 11.660218  3.224834
## Factor3       0.000000              0.000000 0.000000  6.307701  0.000000
## Factor4       0.000000              0.000000 0.000000  0.000000  0.000000
## Factor5       0.000000              0.000000 0.000000  0.000000  5.648640
## Factor6       1.860597              1.587491 2.968197  4.549611  0.000000
##         tumor_size_cm ROIVolume tumor_stage_pathological
## Factor1      0.000000    0.0000                        0
## Factor2      1.889456    1.3047                        0
## Factor3      2.429498    0.0000                        0
## Factor4      0.000000    0.0000                        0
## Factor5      0.000000    0.0000                        0
## Factor6      0.000000    0.0000                        0
##         pathologic_staging_primary_tumor_pt Neoplastic_cellularity vital_status
## Factor1                                   0               0.000000     0.000000
## Factor2                                   0               3.575025     1.605023
## Factor3                                   0               0.000000     0.000000
## Factor4                                   0               4.570112     0.000000
## Factor5                                   0               0.000000     2.507599
## Factor6                                   0               1.759262     0.000000
##         cause_of_death mutation_count KRAS_VAF PFS_days PFS_event  OS_days
## Factor1        0.00000       0.000000 0.000000 0.000000         0 0.000000
## Factor2        0.00000       7.877718 8.552830 2.197986         0 1.982166
## Factor3        0.00000       0.000000 3.195354 0.000000         0 0.000000
## Factor4        0.00000       0.000000 5.120652 0.000000         0 0.000000
## Factor5        1.36495       0.000000 0.000000 1.330565         0 0.000000
## Factor6        0.00000       1.802430 0.000000 0.000000         0 0.000000
##         OS_event
## Factor1 0.000000
## Factor2 0.000000
## Factor3 0.000000
## Factor4 0.000000
## Factor5 1.590967
## Factor6 0.000000
cat("\nCorrelation coefficient (r) matrix:\n")
## 
## Correlation coefficient (r) matrix:
print(round(cor_matrix, 3))
##         PurIST_Subtype PurIST_Subtype_graded Moffitt Bailey Collisson
## Factor1          0.184                 0.274   0.135 -0.232    -0.028
## Factor2         -0.516                -0.357  -0.407  0.578     0.290
## Factor3         -0.106                -0.094   0.006  0.434    -0.138
## Factor4         -0.007                 0.032  -0.002  0.051     0.049
## Factor5          0.122                 0.029   0.120  0.056    -0.391
## Factor6         -0.208                -0.188  -0.289  0.367     0.101
##         tumor_size_cm ROIVolume tumor_stage_pathological
## Factor1         0.001     0.110                    0.017
## Factor2         0.211     0.288                    0.119
## Factor3         0.245     0.161                   -0.041
## Factor4         0.139     0.232                   -0.042
## Factor5        -0.123    -0.180                    0.008
## Factor6         0.100    -0.101                    0.107
##         pathologic_staging_primary_tumor_pt Neoplastic_cellularity vital_status
## Factor1                              -0.044                 -0.056       -0.047
## Factor2                               0.144                  0.304       -0.192
## Factor3                               0.131                 -0.112        0.127
## Factor4                               0.034                 -0.347        0.078
## Factor5                               0.010                  0.014        0.252
## Factor6                               0.012                 -0.201       -0.110
##         cause_of_death mutation_count KRAS_VAF PFS_days PFS_event OS_days
## Factor1         -0.105          0.047   -0.102    0.071    -0.066  -0.010
## Factor2          0.014          0.458    0.484   -0.315     0.022  -0.217
## Factor3          0.055          0.067    0.290    0.098    -0.107   0.008
## Factor4          0.221          0.023   -0.375   -0.056     0.056   0.030
## Factor5          0.231          0.016    0.104    0.232    -0.049   0.131
## Factor6         -0.003         -0.204   -0.143    0.023     0.126  -0.048
##         OS_event
## Factor1    0.020
## Factor2    0.143
## Factor3   -0.086
## Factor4   -0.095
## Factor5   -0.190
## Factor6    0.140
# Convert to p-values
pval_matrix <- 10^(-log_pval_matrix)

cat("\nP-value matrix (raw):\n")
## 
## P-value matrix (raw):
print(pval_matrix)
##         PurIST_Subtype PurIST_Subtype_graded      Moffitt       Bailey
## Factor1   2.948465e-02          1.042802e-03 1.000000e+00 9.514929e-03
## Factor2   6.899688e-11          1.524662e-05 2.432675e-06 2.186664e-12
## Factor3   1.000000e+00          1.000000e+00 1.000000e+00 4.923786e-07
## Factor4   1.000000e+00          1.000000e+00 1.000000e+00 1.000000e+00
## Factor5   1.000000e+00          1.000000e+00 1.000000e+00 1.000000e+00
## Factor6   1.378487e-02          2.585286e-02 1.075977e-03 2.820910e-05
##            Collisson tumor_size_cm  ROIVolume tumor_stage_pathological
## Factor1 1.000000e+00   1.000000000 1.00000000                        1
## Factor2 5.958893e-04   0.012898656 0.04957922                        1
## Factor3 1.000000e+00   0.003719646 1.00000000                        1
## Factor4 1.000000e+00   1.000000000 1.00000000                        1
## Factor5 2.245741e-06   1.000000000 1.00000000                        1
## Factor6 1.000000e+00   1.000000000 1.00000000                        1
##         pathologic_staging_primary_tumor_pt Neoplastic_cellularity vital_status
## Factor1                                   1           1.000000e+00  1.000000000
## Factor2                                   1           2.660574e-04  0.024830041
## Factor3                                   1           1.000000e+00  1.000000000
## Factor4                                   1           2.690838e-05  1.000000000
## Factor5                                   1           1.000000e+00  0.003107427
## Factor6                                   1           1.740757e-02  1.000000000
##         cause_of_death mutation_count     KRAS_VAF    PFS_days PFS_event
## Factor1     1.00000000   1.000000e+00 1.000000e+00 1.000000000         1
## Factor2     1.00000000   1.325201e-08 2.800078e-09 0.006338907         1
## Factor3     1.00000000   1.000000e+00 6.377430e-04 1.000000000         1
## Factor4     1.00000000   1.000000e+00 7.574404e-06 1.000000000         1
## Factor5     0.04315687   1.000000e+00 1.000000e+00 0.046712735         1
## Factor6     1.00000000   1.576049e-02 1.000000e+00 1.000000000         1
##            OS_days   OS_event
## Factor1 1.00000000 1.00000000
## Factor2 0.01041918 1.00000000
## Factor3 1.00000000 1.00000000
## Factor4 1.00000000 1.00000000
## Factor5 1.00000000 0.02564677
## Factor6 1.00000000 1.00000000
# Apply FDR correction
pval_vector <- as.vector(pval_matrix)
padj_vector <- p.adjust(pval_vector, method = "fdr")
padj_matrix <- matrix(padj_vector, nrow = nrow(pval_matrix), ncol = ncol(pval_matrix),
                     dimnames = dimnames(pval_matrix))

cat("\nFDR-adjusted p-value matrix:\n")
## 
## FDR-adjusted p-value matrix:
print(padj_matrix)
##         PurIST_Subtype PurIST_Subtype_graded      Moffitt       Bailey
## Factor1   1.098049e-01          0.0072628436 1.000000e+00 5.138062e-02
## Factor2   3.725831e-09          0.0001829594 3.753271e-05 2.361598e-10
## Factor3   1.000000e+00          1.0000000000 1.000000e+00 1.063538e-05
## Factor4   1.000000e+00          1.0000000000 1.000000e+00 1.000000e+00
## Factor5   1.000000e+00          1.0000000000 1.000000e+00 1.000000e+00
## Factor6   6.472895e-02          0.0997181776 7.262844e-03 2.769621e-04
##            Collisson tumor_size_cm ROIVolume tumor_stage_pathological
## Factor1 1.000000e+00    1.00000000 1.0000000                        1
## Factor2 4.919732e-03    0.06332067 0.1673299                        1
## Factor3 1.000000e+00    0.02231787 1.0000000                        1
## Factor4 1.000000e+00    1.00000000 1.0000000                        1
## Factor5 3.753271e-05    1.00000000 1.0000000                        1
## Factor6 1.000000e+00    1.00000000 1.0000000                        1
##         pathologic_staging_primary_tumor_pt Neoplastic_cellularity vital_status
## Factor1                                   1           1.0000000000   1.00000000
## Factor2                                   1           0.0023945169   0.09971818
## Factor3                                   1           1.0000000000   1.00000000
## Factor4                                   1           0.0002769621   1.00000000
## Factor5                                   1           1.0000000000   0.01974130
## Factor6                                   1           0.0752007119   1.00000000
##         cause_of_death mutation_count     KRAS_VAF   PFS_days PFS_event
## Factor1      1.0000000   1.000000e+00 1.000000e+00 1.00000000         1
## Factor2      1.0000000   3.578044e-07 1.008028e-07 0.03603168         1
## Factor3      1.0000000   1.000000e+00 4.919732e-03 1.00000000         1
## Factor4      1.0000000   1.000000e+00 1.022545e-04 1.00000000         1
## Factor5      0.1553647   1.000000e+00 1.000000e+00 0.16274114         1
## Factor6      1.0000000   7.092220e-02 1.000000e+00 1.00000000         1
##            OS_days   OS_event
## Factor1 1.00000000 1.00000000
## Factor2 0.05358436 1.00000000
## Factor3 1.00000000 1.00000000
## Factor4 1.00000000 1.00000000
## Factor5 1.00000000 0.09971818
## Factor6 1.00000000 1.00000000
# Find significant correlations (FDR < 0.05)
sig_idx_fdr <- which(padj_matrix < 0.05, arr.ind = TRUE)
if(nrow(sig_idx_fdr) > 0) {
  cat("\nSignificant correlations (FDR < 0.05):\n")
  sig_df <- data.frame(
    Factor = rownames(padj_matrix)[sig_idx_fdr[,1]],
    Covariate = colnames(padj_matrix)[sig_idx_fdr[,2]],
    r = cor_matrix[sig_idx_fdr],
    pval = pval_matrix[sig_idx_fdr],
    padj_fdr = padj_matrix[sig_idx_fdr]
  )
  print(sig_df[order(sig_df$padj_fdr), ])
} else {
  cat("\nNo significant correlations at FDR < 0.05\n")
  cat("Top 10 by raw p-value:\n")
  # Find significant correlations by raw p-value
  sig_idx <- which(pval_matrix < 0.05, arr.ind = TRUE)
  if(nrow(sig_idx) > 0) {
    sig_df <- data.frame(
      Factor = rownames(pval_matrix)[sig_idx[,1]],
      Covariate = colnames(pval_matrix)[sig_idx[,2]],
      r = cor_matrix[sig_idx],
      pval = pval_matrix[sig_idx],
      padj_fdr = padj_matrix[sig_idx]
    )
    print(head(sig_df[order(sig_df$pval), ], 10))
  }
}
## 
## Significant correlations (FDR < 0.05):
##     Factor              Covariate          r         pval     padj_fdr
## 6  Factor2                 Bailey  0.5775291 2.186664e-12 2.361598e-10
## 1  Factor2         PurIST_Subtype -0.5158635 6.899688e-11 3.725831e-09
## 16 Factor2               KRAS_VAF  0.4836997 2.800078e-09 1.008028e-07
## 15 Factor2         mutation_count  0.4575024 1.325201e-08 3.578044e-07
## 7  Factor3                 Bailey  0.4335007 4.923786e-07 1.063538e-05
## 4  Factor2                Moffitt -0.4072715 2.432675e-06 3.753271e-05
## 10 Factor5              Collisson -0.3914481 2.245741e-06 3.753271e-05
## 18 Factor4               KRAS_VAF -0.3746620 7.574404e-06 1.022545e-04
## 3  Factor2  PurIST_Subtype_graded -0.3566407 1.524662e-05 1.829594e-04
## 8  Factor6                 Bailey  0.3665812 2.820910e-05 2.769621e-04
## 13 Factor4 Neoplastic_cellularity -0.3468414 2.690838e-05 2.769621e-04
## 12 Factor2 Neoplastic_cellularity  0.3035835 2.660574e-04 2.394517e-03
## 9  Factor2              Collisson  0.2896805 5.958893e-04 4.919732e-03
## 17 Factor3               KRAS_VAF  0.2902641 6.377430e-04 4.919732e-03
## 2  Factor1  PurIST_Subtype_graded  0.2742288 1.042802e-03 7.262844e-03
## 5  Factor6                Moffitt -0.2890991 1.075977e-03 7.262844e-03
## 14 Factor5           vital_status  0.2517788 3.107427e-03 1.974130e-02
## 11 Factor3          tumor_size_cm  0.2453947 3.719646e-03 2.231787e-02
## 19 Factor2               PFS_days -0.3145792 6.338907e-03 3.603168e-02

Scatterplots

# plot samples in factor space colored by clinical variables
# plot factos 2:4, 2:6, and 4:6
# color by categorical clinical variables - PurIST_Subtype, Moffitt, Bailey
# color by continuous clinical variables - mutation_count, KRAS_VAF, Neoplastic_cellularity, PFS_days
# there should be a total of 21 plots

# Ensure continuous variables are numeric
sample_meta <- samples_metadata(MOFAmodel)
sample_meta$Neoplastic_cellularity <- as.numeric(sample_meta$Neoplastic_cellularity)
## Warning: NAs introduced by coercion
sample_meta$mutation_count <- as.numeric(sample_meta$mutation_count)
sample_meta$KRAS_VAF <- as.numeric(sample_meta$KRAS_VAF)
sample_meta$PFS_days <- as.numeric(sample_meta$PFS_days)
samples_metadata(MOFAmodel) <- sample_meta

categorical_vars <- c("PurIST_Subtype", "Moffitt", "Bailey")
continuous_vars <- c("mutation_count", "KRAS_VAF", "Neoplastic_cellularity", "PFS_days")
factor_pairs <- list(c(2,4), c(2,6), c(4,6))
for(pair in factor_pairs) {
  for(var in categorical_vars) {
    p <- plot_factors(MOFAmodel, factors = pair, color_by = var, dot_size = 4) +
      ggtitle(paste("Factors", pair[1], "vs", pair[2], "colored by", var))
    print(p)
  }
  for(var in continuous_vars) {
    p <- plot_factors(MOFAmodel, factors = pair, color_by = var, dot_size = 4) +
      ggtitle(paste("Factors", pair[1], "vs", pair[2], "colored by", var))
    print(p)
  }
}

Visualize factor values colored by clinical variables

plot_factor(MOFAmodel, factors = c(2,6), color_by = "PurIST_Subtype",
            dot_size = 3, dodge = TRUE,
            add_violin = TRUE, violin_alpha = 0.25
)

plot_factor(MOFAmodel, factors = c(2,6), color_by = "Moffitt",
            dot_size = 3, dodge = TRUE,
            add_violin = TRUE, violin_alpha = 0.25
)

plot_top_weights(MOFAmodel,
  view = "Protein",
  factor = 2,
  nfeatures = 10
)

plot_top_weights(MOFAmodel,
  view = "RNA",
  factor = 2,
  nfeatures = 10
)

Heatmap of factor values

plot_data_heatmap(MOFAmodel,
  view = "Protein",           # view of interest
  factor = 2,                 # factor of interest
  features = 100,             # number of features to plot (they are selected by weight)
  cluster_rows = FALSE, cluster_cols = TRUE,
  show_rownames = TRUE, show_colnames = FALSE,
  annotation_samples = c("PurIST_Subtype", "Moffitt", "Bailey", "vital_status")
)

plot_data_heatmap(MOFAmodel,
  view = "RNA",           
  factor = 2,             
  features = 100,          
  cluster_rows = FALSE, cluster_cols = TRUE,
  show_rownames = TRUE, show_colnames = FALSE,
  annotation_samples = c("PurIST_Subtype", "Moffitt", "Bailey", "vital_status")
)

plot_data_heatmap(MOFAmodel,
  view = "Protein",           
  factor = 4,             
  features = 100,          
  cluster_rows = FALSE, cluster_cols = TRUE,
  show_rownames = TRUE, show_colnames = FALSE,
  annotation_samples = c("PurIST_Subtype", "Moffitt", "Bailey", "vital_status")
)

plot_data_heatmap(MOFAmodel,
  view = "RNA",           
  factor = 4,             
  features = 100,          
  cluster_rows = FALSE, cluster_cols = TRUE,
  show_rownames = TRUE, show_colnames = FALSE,
  annotation_samples = c("PurIST_Subtype", "Moffitt", "Bailey", "vital_status")
)

plot_data_heatmap(MOFAmodel,
  view = "Protein",           
  factor = 6,             
  features = 100,         
  cluster_rows = FALSE, cluster_cols = TRUE,
  show_rownames = TRUE, show_colnames = FALSE,
  annotation_samples = c("PurIST_Subtype", "Moffitt", "Bailey", "vital_status")
)

plot_data_heatmap(MOFAmodel,
  view = "RNA",           
  factor = 6,             
  features = 100,         
  cluster_rows = FALSE, cluster_cols = TRUE,
  show_rownames = TRUE, show_colnames = FALSE,
  annotation_samples = c("PurIST_Subtype", "Moffitt", "Bailey", "vital_status")
)

plot_data_heatmap(MOFAmodel,
  view = "RNA",           # view of interest
  factor = 2,             # factor of interest
  features = 100,          # number of features to plot (they are selected by weight)
  cluster_rows = FALSE, cluster_cols = TRUE,
  show_rownames = TRUE, show_colnames = FALSE,
  annotation_samples = "tumor_stage_pathological"  # Can now use column name directly
)

UMAP, tSNE of factors

MOFAmodel <- run_umap(MOFAmodel, factors = c(2,4,6))
plot_dimred(MOFAmodel, method = "UMAP",color_by = "PurIST_Subtype", dot_size = 5)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the MOFA2 package.
##   Please report the issue at <https://github.com/bioFAM/MOFA2>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot_dimred(MOFAmodel, method = "UMAP",color_by = "PurIST_Subtype_graded", dot_size = 5)

plot_dimred(MOFAmodel, method = "UMAP",color_by = "Moffitt", dot_size = 5)

MOFAmodel <- run_tsne(MOFAmodel, factors = c(2,4,6))
plot_dimred(MOFAmodel, method = "TSNE",color_by = "PurIST_Subtype", dot_size = 5)

plot_dimred(MOFAmodel, method = "TSNE",color_by = "PurIST_Subtype_graded", dot_size = 5)

plot_dimred(MOFAmodel, method = "TSNE",color_by = "Moffitt", dot_size = 5)